{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from requests_html import HTMLSession\n",
    "import requests_html\n",
    "import pandas as pd\n",
    "import urllib.parse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 学校要闻\n",
    "session = HTMLSession()\n",
    "r = session.get(\"https://www.nfu.edu.cn/xxyw/index.htm\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "91\n"
     ]
    }
   ],
   "source": [
    "for i in range(1, 100):\n",
    "    effective_url = session.get('https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm')\n",
    "    if effective_url.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xxyw/index.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index84.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index85.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index86.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index87.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index88.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index89.htm',\n",
       " 'https://www.nfu.edu.cn/xxyw/index90.htm']"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_one = ['https://www.nfu.edu.cn/xxyw/index'+str(i)+'.htm' for i in range(1,91)]\n",
    "url_group_one.insert(0,'https://www.nfu.edu.cn/xxyw/index.htm')\n",
    "url_group_one"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'https://www.nfu.edu.cn/xxyw/5b71d46d3b114859ae92f7535a7d60c9.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04becfaf48b3138e20.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/f0002a2424f34ad8b258adb1d07ca28b.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9a2cdc278fc884ea.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69d241e79365b6290.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/debb2f222e024cbda5d2644acb6c552c.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/e5378134dbaf4b7b88d3003f1cd99e59.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/7c865b16b203467ab6ddf5569f73e5c1.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/28b0ad0eee8149e6b7f4ae65395910ff.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/c48c33c8f744430eb9417b800a8b2e3f.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/395b8e2ba5df47c59d080d50d1113be1.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/59bda093ced440f78c638ade40ab0b93.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/1af5590575b74762b624f048b5ad79f4.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/4e32521de0da4d21979182e1b114a964.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/23279088871e4b89b8eab2e7fbc77b17.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/a5de3999469447b488857144f58f8c27.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/6273fd9185b54b20a0af15b9878f1d2c.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/a1f9ac1d39704e4d8136478ec97e3635.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/c438a1ec6db5446faf76617654b5ca55.htm'},\n",
       " {'https://www.nfu.edu.cn/xxyw/f28729353ff749b9b170825ffe346949.htm'}]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "list_URL=[i.absolute_links for i in r.html.xpath('//div[@class=\"news_title\"]/a')]\n",
    "list_URL"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "ParseResult(scheme='https', netloc='www.nfu.edu.cn', path='/xxyw/index.htm', params='', query='', fragment='')"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "base_url = r.url\n",
    "nfu_urlparse = urllib.parse.urlparse(base_url)\n",
    "nfu_urlparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链接</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>我校召开高校教师职称评审 政策解读专题报告会</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/f0002a2424f34ad8b...</td>\n",
       "      <td>2021-04-10</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>快！来为我校大学生国旗护卫队参赛点赞！</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/5b71d46d3b114859a...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>我校承办首届 “新时代从商培养工程”</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>专注当下，冲刺高考，奋斗出最美的青春</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04b...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1799</th>\n",
       "      <td>19</td>\n",
       "      <td>我院经管系2013级创新实验国际班开班典礼隆重举行</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/2af0127ce4234c7aa...</td>\n",
       "      <td>2013-09-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1800</th>\n",
       "      <td>0</td>\n",
       "      <td>学院教学工作会议顺利召开</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/b844901be7a6412eb...</td>\n",
       "      <td>2013-09-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1801</th>\n",
       "      <td>1</td>\n",
       "      <td>我院召开新进教职工座谈会</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/460879ee62c94531b...</td>\n",
       "      <td>2013-09-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1802</th>\n",
       "      <td>2</td>\n",
       "      <td>我院2013级新生军训正式开始</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/9ae5ab09744e4d808...</td>\n",
       "      <td>2013-09-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1803</th>\n",
       "      <td>3</td>\n",
       "      <td>我院2013级新生“安全法纪教育”讲座顺利举行</td>\n",
       "      <td>{https://www.nfu.edu.cn/xxyw/e8f0aa3bb74d43cbb...</td>\n",
       "      <td>2013-09-17</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1804 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      index                               标题  \\\n",
       "2         2           我校召开高校教师职称评审 政策解读专题报告会   \n",
       "0         0              快！来为我校大学生国旗护卫队参赛点赞！   \n",
       "4         4               我校承办首届 “新时代从商培养工程”   \n",
       "1         1               专注当下，冲刺高考，奋斗出最美的青春   \n",
       "3         3  我校召开2021年一流专业、一流课程、教学成果奖申报工作推进会   \n",
       "...     ...                              ...   \n",
       "1799     19        我院经管系2013级创新实验国际班开班典礼隆重举行   \n",
       "1800      0                     学院教学工作会议顺利召开   \n",
       "1801      1                     我院召开新进教职工座谈会   \n",
       "1802      2                  我院2013级新生军训正式开始   \n",
       "1803      3          我院2013级新生“安全法纪教育”讲座顺利举行   \n",
       "\n",
       "                                                     链接          日期  \n",
       "2     {https://www.nfu.edu.cn/xxyw/f0002a2424f34ad8b...  2021-04-10  \n",
       "0     {https://www.nfu.edu.cn/xxyw/5b71d46d3b114859a...  2021-04-09  \n",
       "4     {https://www.nfu.edu.cn/xxyw/0d7bd841484a42a69...  2021-04-02  \n",
       "1     {https://www.nfu.edu.cn/xxyw/f9bcd8092b494a04b...  2021-04-02  \n",
       "3     {https://www.nfu.edu.cn/xxyw/48b0929919ec4d2d9...  2021-04-02  \n",
       "...                                                 ...         ...  \n",
       "1799  {https://www.nfu.edu.cn/xxyw/2af0127ce4234c7aa...  2013-09-29  \n",
       "1800  {https://www.nfu.edu.cn/xxyw/b844901be7a6412eb...  2013-09-26  \n",
       "1801  {https://www.nfu.edu.cn/xxyw/460879ee62c94531b...  2013-09-25  \n",
       "1802  {https://www.nfu.edu.cn/xxyw/9ae5ab09744e4d808...  2013-09-17  \n",
       "1803  {https://www.nfu.edu.cn/xxyw/e8f0aa3bb74d43cbb...  2013-09-17  \n",
       "\n",
       "[1804 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "list_df_1 = []\n",
    "url_group_1 = []\n",
    "for i in url_group_one:\n",
    "    e = session.get(i)\n",
    "    url_group_1.append(e)\n",
    "for j in url_group_1:\n",
    "    df_one = pd.DataFrame( {\n",
    "         \"标题\": j.html.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链接\": [i.absolute_links for i in j.html.xpath('//div[@class=\"news_title\"]/a')],\n",
    "         \"日期\": j.html.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )    \n",
    "    list_df_1.append(df_one)\n",
    "df_all_1 = pd.concat(list_df_1).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all_1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "writer = pd.ExcelWriter('out_data.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "200"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "session = HTMLSession()\n",
    "r_two = session.get( 'https://www.nfu.edu.cn/xydt/index.htm')\n",
    "r_two.status_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "85\n"
     ]
    }
   ],
   "source": [
    "for i in range(1, 100):\n",
    "    effective_url = session.get('https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm')\n",
    "    if effective_url.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/xydt/index.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index25.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index26.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index27.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index28.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index29.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index30.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index31.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index32.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index33.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index34.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index35.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index36.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index37.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index38.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index39.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index40.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index41.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index42.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index43.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index44.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index45.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index46.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index47.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index48.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index49.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index50.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index51.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index52.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index53.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index54.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index55.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index56.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index57.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index58.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index59.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index60.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index61.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index62.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index63.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index64.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index65.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index66.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index67.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index68.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index69.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index70.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index71.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index72.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index73.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index74.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index75.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index76.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index77.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index78.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index79.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index80.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index81.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index82.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index83.htm',\n",
       " 'https://www.nfu.edu.cn/xydt/index84.htm']"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_two = ['https://www.nfu.edu.cn/xydt/index'+str(i)+'.htm' for i in range(1,85)]\n",
    "url_group_two.insert(0,'https://www.nfu.edu.cn/xydt/index.htm')\n",
    "url_group_two\n",
    " "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链接</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>【国奖映像】苏绮筠：让优秀成为习惯</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/7dfe6fcd15fd49559...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/9ec16bf90e164071b...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>【国奖映像】陈宇：心怀热爱，奔赴梦想</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/09627d3243ee4578a...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/debea203b0c84a309...</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>商学院电子商务专业召开申请调整学位授予学科门类 专家评审会</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/cf4420785b9046e99...</td>\n",
       "      <td>2021-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1683</th>\n",
       "      <td>3</td>\n",
       "      <td>广东技术师范学院大学英语部与我院大学英语教学中心教师交流会顺利举行</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/5cc461d4a37a4afb8...</td>\n",
       "      <td>2016-01-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1684</th>\n",
       "      <td>4</td>\n",
       "      <td>经济学与商务管理系顺利召开办公培训会议</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/9b59863e5051412d8...</td>\n",
       "      <td>2016-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1685</th>\n",
       "      <td>5</td>\n",
       "      <td>严谨为学，诚信迎考--工商管理系班级期末总结暨诚信考试动员大会圆满结束</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/3d02b255690e4ce79...</td>\n",
       "      <td>2016-01-04</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1686</th>\n",
       "      <td>6</td>\n",
       "      <td>以学生为本，做优秀学生干部——我院“青马工程”第二讲举行</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/0132532dbb0e448d8...</td>\n",
       "      <td>2015-10-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1687</th>\n",
       "      <td>7</td>\n",
       "      <td>经济学与商务管理系党总支第17期入党积极分子实践活动圆满结束</td>\n",
       "      <td>{https://www.nfu.edu.cn/xydt/2d20a911787b4cc09...</td>\n",
       "      <td>2015-03-28</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1688 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      index                                   标题  \\\n",
       "0         0                    【国奖映像】苏绮筠：让优秀成为习惯   \n",
       "3         3  护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行   \n",
       "1         1                   【国奖映像】陈宇：心怀热爱，奔赴梦想   \n",
       "2         2    护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束   \n",
       "4         4        商学院电子商务专业召开申请调整学位授予学科门类 专家评审会   \n",
       "...     ...                                  ...   \n",
       "1683      3    广东技术师范学院大学英语部与我院大学英语教学中心教师交流会顺利举行   \n",
       "1684      4                  经济学与商务管理系顺利召开办公培训会议   \n",
       "1685      5  严谨为学，诚信迎考--工商管理系班级期末总结暨诚信考试动员大会圆满结束   \n",
       "1686      6         以学生为本，做优秀学生干部——我院“青马工程”第二讲举行   \n",
       "1687      7       经济学与商务管理系党总支第17期入党积极分子实践活动圆满结束   \n",
       "\n",
       "                                                     链接          日期  \n",
       "0     {https://www.nfu.edu.cn/xydt/7dfe6fcd15fd49559...  2021-04-09  \n",
       "3     {https://www.nfu.edu.cn/xydt/9ec16bf90e164071b...  2021-04-09  \n",
       "1     {https://www.nfu.edu.cn/xydt/09627d3243ee4578a...  2021-04-09  \n",
       "2     {https://www.nfu.edu.cn/xydt/debea203b0c84a309...  2021-04-09  \n",
       "4     {https://www.nfu.edu.cn/xydt/cf4420785b9046e99...  2021-04-07  \n",
       "...                                                 ...         ...  \n",
       "1683  {https://www.nfu.edu.cn/xydt/5cc461d4a37a4afb8...  2016-01-08  \n",
       "1684  {https://www.nfu.edu.cn/xydt/9b59863e5051412d8...  2016-01-04  \n",
       "1685  {https://www.nfu.edu.cn/xydt/3d02b255690e4ce79...  2016-01-04  \n",
       "1686  {https://www.nfu.edu.cn/xydt/0132532dbb0e448d8...  2015-10-30  \n",
       "1687  {https://www.nfu.edu.cn/xydt/2d20a911787b4cc09...  2015-03-28  \n",
       "\n",
       "[1688 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "list_df_2 = []\n",
    "url_group_2 = []\n",
    "for i in url_group_two:\n",
    "    e = session.get(i)\n",
    "    url_group_2.append(e)\n",
    "    \n",
    "for j in url_group_2:\n",
    "    df_two = pd.DataFrame( {\n",
    "         \"标题\": j.html.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链接\": [i.absolute_links for i in j.html.xpath('//div[@class=\"news_title\"]/a')],\n",
    "         \"日期\": j.html.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )    \n",
    "    list_df_2.append(df_two)\n",
    "df_all_2 = pd.concat(list_df_2).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all_2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>标题</th>\n",
       "      <th>链接</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>【国奖映像】苏绮筠：让优秀成为习惯</td>\n",
       "      <td>7dfe6fcd15fd495597cbd282de863733.htm</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>【国奖映像】陈宇：心怀热爱，奔赴梦想</td>\n",
       "      <td>09627d3243ee4578ac69be2881abd8b3.htm</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束</td>\n",
       "      <td>debea203b0c84a3092e6b5416cc4c2f1.htm</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行</td>\n",
       "      <td>9ec16bf90e164071b68a57332c5fe020.htm</td>\n",
       "      <td>2021-04-09</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>商学院电子商务专业召开申请调整学位授予学科门类 专家评审会</td>\n",
       "      <td>cf4420785b9046e99851413a1fb1b6f7.htm</td>\n",
       "      <td>2021-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>会计学院大一年级大会顺利召开</td>\n",
       "      <td>16f4c5f4bd284caebfe79cd5d66e288b.htm</td>\n",
       "      <td>2021-04-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>广州新华学院会计学院刘运国院长一行莅临我院访问</td>\n",
       "      <td>935f580040704990a4e396fa8091ee30.htm</td>\n",
       "      <td>2021-04-06</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>【国奖映像】蒋晓琳：明确目标，为之努力</td>\n",
       "      <td>9611d110ec8a486587ab4020171ee9f5.htm</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>励能计划2021：你选哪一项？</td>\n",
       "      <td>8b2414ee7cca45d88c4217dd13f8f8ec.htm</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>党团同行传薪火，红色循迹筑初心——商学院党团同行重走“东江纵队”红色之路</td>\n",
       "      <td>a9523b72a34e4143afa9b38879ecba0c.htm</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>广州南方学院老年与慢病护理研究中心学术沙龙系列第3期圆满结束</td>\n",
       "      <td>f8434c9f092348c2a4a3c1ac8727a8fd.htm</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>凝心聚力 共谱新篇——商学院召开新学期全体教职工大会</td>\n",
       "      <td>d60d33983337463390ef99385afce119.htm</td>\n",
       "      <td>2021-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>青马工程｜“回首峥嵘岁月，领悟红船精神”——商学院百年党史宣讲活动</td>\n",
       "      <td>4b0c8e69b5074d28badd20cb55436009.htm</td>\n",
       "      <td>2021-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>护理与健康学院“教学相长，从教学中成长”暨青年教师专题讲座与交流活动圆满结束</td>\n",
       "      <td>bdda4dfbda944a3eb84612b7045620f4.htm</td>\n",
       "      <td>2021-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>党建观摩拓思路，对照标杆“取真经”——我校商学院师生团队赴广东外语外贸大学南国商学院管理学院...</td>\n",
       "      <td>96e05388e3fa43de9a8f446b083875f8.htm</td>\n",
       "      <td>2021-03-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>护理与健康学院直属党支部2021年春季入党积极分子培训圆满结束</td>\n",
       "      <td>25dc7cb574284be18a6c9a0640e5aca3.htm</td>\n",
       "      <td>2021-03-30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>成长在文传我院举行2021年春季学期全体教职工大会</td>\n",
       "      <td>b9777111c2194e7b85143431ab4706a7.htm</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>“远离糖尿病，筑起健康防线” 护理与健康学院寒假社会实践调查成果汇报展示圆满结束</td>\n",
       "      <td>2f3dcc0f4400419e8e42af09fde3c251.htm</td>\n",
       "      <td>2021-03-29</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>倾心指导促研学，凝心聚力谋发展——大英中心教学研究座谈会顺利召开</td>\n",
       "      <td>c3846031b4c0444a99e0dfd90047c046.htm</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>护理与健康学院第37期师生面对面顺利开展</td>\n",
       "      <td>5e165fdaee834899891ba5b3eea69bc9.htm</td>\n",
       "      <td>2021-03-26</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                   标题  \\\n",
       "0                                   【国奖映像】苏绮筠：让优秀成为习惯   \n",
       "1                                  【国奖映像】陈宇：心怀热爱，奔赴梦想   \n",
       "2                   护理与健康学院2020-2021第二学期团员培训课程第2讲圆满结束   \n",
       "3                 护理与健康学院2020-2021年度第二学期3月份团支部委员会顺利举行   \n",
       "4                       商学院电子商务专业召开申请调整学位授予学科门类 专家评审会   \n",
       "5                                      会计学院大一年级大会顺利召开   \n",
       "6                             广州新华学院会计学院刘运国院长一行莅临我院访问   \n",
       "7                                 【国奖映像】蒋晓琳：明确目标，为之努力   \n",
       "8                                     励能计划2021：你选哪一项？   \n",
       "9                党团同行传薪火，红色循迹筑初心——商学院党团同行重走“东江纵队”红色之路   \n",
       "10                     广州南方学院老年与慢病护理研究中心学术沙龙系列第3期圆满结束   \n",
       "11                         凝心聚力 共谱新篇——商学院召开新学期全体教职工大会   \n",
       "12                  青马工程｜“回首峥嵘岁月，领悟红船精神”——商学院百年党史宣讲活动   \n",
       "13             护理与健康学院“教学相长，从教学中成长”暨青年教师专题讲座与交流活动圆满结束   \n",
       "14  党建观摩拓思路，对照标杆“取真经”——我校商学院师生团队赴广东外语外贸大学南国商学院管理学院...   \n",
       "15                    护理与健康学院直属党支部2021年春季入党积极分子培训圆满结束   \n",
       "16                          成长在文传我院举行2021年春季学期全体教职工大会   \n",
       "17           “远离糖尿病，筑起健康防线” 护理与健康学院寒假社会实践调查成果汇报展示圆满结束   \n",
       "18                   倾心指导促研学，凝心聚力谋发展——大英中心教学研究座谈会顺利召开   \n",
       "19                               护理与健康学院第37期师生面对面顺利开展   \n",
       "\n",
       "                                      链接          日期  \n",
       "0   7dfe6fcd15fd495597cbd282de863733.htm  2021-04-09  \n",
       "1   09627d3243ee4578ac69be2881abd8b3.htm  2021-04-09  \n",
       "2   debea203b0c84a3092e6b5416cc4c2f1.htm  2021-04-09  \n",
       "3   9ec16bf90e164071b68a57332c5fe020.htm  2021-04-09  \n",
       "4   cf4420785b9046e99851413a1fb1b6f7.htm  2021-04-07  \n",
       "5   16f4c5f4bd284caebfe79cd5d66e288b.htm  2021-04-06  \n",
       "6   935f580040704990a4e396fa8091ee30.htm  2021-04-06  \n",
       "7   9611d110ec8a486587ab4020171ee9f5.htm  2021-04-02  \n",
       "8   8b2414ee7cca45d88c4217dd13f8f8ec.htm  2021-04-02  \n",
       "9   a9523b72a34e4143afa9b38879ecba0c.htm  2021-04-02  \n",
       "10  f8434c9f092348c2a4a3c1ac8727a8fd.htm  2021-04-02  \n",
       "11  d60d33983337463390ef99385afce119.htm  2021-04-01  \n",
       "12  4b0c8e69b5074d28badd20cb55436009.htm  2021-04-01  \n",
       "13  bdda4dfbda944a3eb84612b7045620f4.htm  2021-04-01  \n",
       "14  96e05388e3fa43de9a8f446b083875f8.htm  2021-03-30  \n",
       "15  25dc7cb574284be18a6c9a0640e5aca3.htm  2021-03-30  \n",
       "16  b9777111c2194e7b85143431ab4706a7.htm  2021-03-29  \n",
       "17  2f3dcc0f4400419e8e42af09fde3c251.htm  2021-03-29  \n",
       "18  c3846031b4c0444a99e0dfd90047c046.htm  2021-03-26  \n",
       "19  5e165fdaee834899891ba5b3eea69bc9.htm  2021-03-26  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_two = pd.DataFrame( {\n",
    "         \"标题\": r_two.html.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链接\": r_two.html.xpath('//div[@class=\"news_title\"]/a/@href'),\n",
    "         \"日期\": r_two.html.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )\n",
    "df_two"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "session = HTMLSession()\n",
    "r_three = session.get( 'https://www.nfu.edu.cn/tzgg/index.htm')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "35\n"
     ]
    }
   ],
   "source": [
    "for i in range(1, 100):\n",
    "    effective_url = session.get('https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm')\n",
    "    if effective_url.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/tzgg/index.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index1.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index2.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index3.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index4.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index5.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index6.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index7.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index8.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index9.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index10.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index11.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index12.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index13.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index14.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index15.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index16.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index17.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index18.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index19.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index20.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index21.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index22.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index23.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index24.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index25.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index26.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index27.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index28.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index29.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index30.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index31.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index32.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index33.htm',\n",
       " 'https://www.nfu.edu.cn/tzgg/index34.htm']"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_three = ['https://www.nfu.edu.cn/tzgg/index'+str(i)+'.htm' for i in range(1,35)]\n",
    "url_group_three.insert(0,'https://www.nfu.edu.cn/tzgg/index.htm')\n",
    "url_group_three"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链接</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>校园管理部关于2021年元旦放假校园生活服务安排的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/cd60e06378e544929...</td>\n",
       "      <td>2020-12-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>中山大学南方学院关于2021年元旦放假安排的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/16fcbd56eab04220b...</td>\n",
       "      <td>2020-12-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>关于开展2020年知识产权竞赛的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/155655d4a7e74c769...</td>\n",
       "      <td>2020-12-16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院关于举办2020年预防艾滋病巡讲活动的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/f381db0e5b3e4746b...</td>\n",
       "      <td>2020-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>关于开展2020年安全知识竞赛的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/ae83ecc6ce894bcb8...</td>\n",
       "      <td>2020-12-03</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>682</th>\n",
       "      <td>2</td>\n",
       "      <td>“南苑青年”系列讲座之第十三讲的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/bbd14d55a99247a79...</td>\n",
       "      <td>2015-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>683</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院关于举办“南方湖畔·艺彩纷呈”第七届校园文化艺术节活动通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/a8e5e752e409486da...</td>\n",
       "      <td>2015-04-07</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>684</th>\n",
       "      <td>4</td>\n",
       "      <td>学院办公室关于2015年五一放假安排的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/e3f763049ee54cfc8...</td>\n",
       "      <td>2015-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>685</th>\n",
       "      <td>5</td>\n",
       "      <td>中山大学南方学院关于2015年公共机构节能宣传作品征集活动的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/f3ae1aa3ccdb4d87b...</td>\n",
       "      <td>2015-04-01</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>686</th>\n",
       "      <td>6</td>\n",
       "      <td>关于开展校园网络和运营商移动网络使用情况调查的通知</td>\n",
       "      <td>{https://www.nfu.edu.cn/tzgg/6de44f6a618540ef8...</td>\n",
       "      <td>1970-01-01</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>687 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index                                     标题  \\\n",
       "0        0            校园管理部关于2021年元旦放假校园生活服务安排的通知   \n",
       "1        1               中山大学南方学院关于2021年元旦放假安排的通知   \n",
       "2        2                     关于开展2020年知识产权竞赛的通知   \n",
       "3        3          中山大学南方学院关于举办2020年预防艾滋病巡讲活动的通知   \n",
       "4        4                     关于开展2020年安全知识竞赛的通知   \n",
       "..     ...                                    ...   \n",
       "682      2                     “南苑青年”系列讲座之第十三讲的通知   \n",
       "683      3  中山大学南方学院关于举办“南方湖畔·艺彩纷呈”第七届校园文化艺术节活动通知   \n",
       "684      4                  学院办公室关于2015年五一放假安排的通知   \n",
       "685      5       中山大学南方学院关于2015年公共机构节能宣传作品征集活动的通知   \n",
       "686      6              关于开展校园网络和运营商移动网络使用情况调查的通知   \n",
       "\n",
       "                                                    链接          日期  \n",
       "0    {https://www.nfu.edu.cn/tzgg/cd60e06378e544929...  2020-12-25  \n",
       "1    {https://www.nfu.edu.cn/tzgg/16fcbd56eab04220b...  2020-12-17  \n",
       "2    {https://www.nfu.edu.cn/tzgg/155655d4a7e74c769...  2020-12-16  \n",
       "3    {https://www.nfu.edu.cn/tzgg/f381db0e5b3e4746b...  2020-12-03  \n",
       "4    {https://www.nfu.edu.cn/tzgg/ae83ecc6ce894bcb8...  2020-12-03  \n",
       "..                                                 ...         ...  \n",
       "682  {https://www.nfu.edu.cn/tzgg/bbd14d55a99247a79...  2015-04-08  \n",
       "683  {https://www.nfu.edu.cn/tzgg/a8e5e752e409486da...  2015-04-07  \n",
       "684  {https://www.nfu.edu.cn/tzgg/e3f763049ee54cfc8...  2015-04-01  \n",
       "685  {https://www.nfu.edu.cn/tzgg/f3ae1aa3ccdb4d87b...  2015-04-01  \n",
       "686  {https://www.nfu.edu.cn/tzgg/6de44f6a618540ef8...  1970-01-01  \n",
       "\n",
       "[687 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "list_df_3 = []\n",
    "url_group_3 = []\n",
    "for i in url_group_three:\n",
    "    e = session.get(i)\n",
    "    url_group_3.append(e)\n",
    "    \n",
    "for j in url_group_3:\n",
    "    df_three = pd.DataFrame( {\n",
    "         \"标题\": j.html.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链接\": [i.absolute_links for i in j.html.xpath('//div[@class=\"news_title\"]/a')],\n",
    "         \"日期\": j.html.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )    \n",
    "    list_df_3.append(df_three)\n",
    "df_all_3 = pd.concat(list_df_3).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all_3)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "200"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "session = HTMLSession()\n",
    "r_four = session.get( 'https://www.nfu.edu.cn/ztb/index.htm')\n",
    "r_four.status_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22\n"
     ]
    }
   ],
   "source": [
    "for i in range(1, 100):\n",
    "    effective_url = session.get('https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm')\n",
    "    if effective_url.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/ztb/index.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index1.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index2.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index3.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index4.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index5.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index6.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index7.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index8.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index9.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index10.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index11.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index12.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index13.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index14.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index15.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index16.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index17.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index18.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index19.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index20.htm',\n",
       " 'https://www.nfu.edu.cn/ztb/index21.htm']"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_four = ['https://www.nfu.edu.cn/ztb/index'+str(i)+'.htm' for i in range(1,22)]\n",
    "url_group_four.insert(0,'https://www.nfu.edu.cn/ztb/index.htm')\n",
    "url_group_four"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链接</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目招标开标延期公告</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/4aa14103a6d34d4283...</td>\n",
       "      <td>2021-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目 招标公告</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/ea8754261f26419080...</td>\n",
       "      <td>2021-04-02</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>中山大学南方学院数字电路基础实验室、电路与模拟电子实验室设备采购项目招标公告</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/7226fe9acf3b4757b9...</td>\n",
       "      <td>2021-03-31</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>中山大学南方学院垃圾清运和处理服务项目招标公告</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/414b2db5e6c04f99be...</td>\n",
       "      <td>2021-03-17</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>中山大学南方学院2021年度维修、改造工程施工项目中标结果公示</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/60c660848ef44283bc...</td>\n",
       "      <td>2021-03-11</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>429</th>\n",
       "      <td>9</td>\n",
       "      <td>中山大学南方学院学生体质健康测试仪采购项目招标公告（第二次）</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/ba01c43761e245d493...</td>\n",
       "      <td>2015-03-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>430</th>\n",
       "      <td>10</td>\n",
       "      <td>中山大学南方学院计算机实验室设备采购项目中标公示</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/0020f85b9ef24d0792...</td>\n",
       "      <td>2015-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>431</th>\n",
       "      <td>11</td>\n",
       "      <td>中山大学南方学院电气工程及自动化实验室设备采购项目招标公告（第二次）</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/10482a669fc54447aa...</td>\n",
       "      <td>2015-03-26</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>432</th>\n",
       "      <td>12</td>\n",
       "      <td>中山大学南方学院音乐楼阶梯课室座椅采购项目中标公示</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/4e5e67a17b7d47cf8c...</td>\n",
       "      <td>2015-03-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>433</th>\n",
       "      <td>13</td>\n",
       "      <td>中山大学南方学院室内高尔夫模拟设备项目招标公告</td>\n",
       "      <td>{https://www.nfu.edu.cn/ztb/35a1b4dab36a4ae5aa...</td>\n",
       "      <td>2013-12-23</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>434 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index                                      标题  \\\n",
       "0        0     广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目招标开标延期公告   \n",
       "1        1        广州南方学院蚊子、苍蝇、蟑螂消杀及白蚁、红火蚁防治项目 招标公告   \n",
       "2        2  中山大学南方学院数字电路基础实验室、电路与模拟电子实验室设备采购项目招标公告   \n",
       "3        3                 中山大学南方学院垃圾清运和处理服务项目招标公告   \n",
       "4        4         中山大学南方学院2021年度维修、改造工程施工项目中标结果公示   \n",
       "..     ...                                     ...   \n",
       "429      9          中山大学南方学院学生体质健康测试仪采购项目招标公告（第二次）   \n",
       "430     10                中山大学南方学院计算机实验室设备采购项目中标公示   \n",
       "431     11      中山大学南方学院电气工程及自动化实验室设备采购项目招标公告（第二次）   \n",
       "432     12               中山大学南方学院音乐楼阶梯课室座椅采购项目中标公示   \n",
       "433     13                 中山大学南方学院室内高尔夫模拟设备项目招标公告   \n",
       "\n",
       "                                                    链接          日期  \n",
       "0    {https://www.nfu.edu.cn/ztb/4aa14103a6d34d4283...  2021-04-08  \n",
       "1    {https://www.nfu.edu.cn/ztb/ea8754261f26419080...  2021-04-02  \n",
       "2    {https://www.nfu.edu.cn/ztb/7226fe9acf3b4757b9...  2021-03-31  \n",
       "3    {https://www.nfu.edu.cn/ztb/414b2db5e6c04f99be...  2021-03-17  \n",
       "4    {https://www.nfu.edu.cn/ztb/60c660848ef44283bc...  2021-03-11  \n",
       "..                                                 ...         ...  \n",
       "429  {https://www.nfu.edu.cn/ztb/ba01c43761e245d493...  2015-03-27  \n",
       "430  {https://www.nfu.edu.cn/ztb/0020f85b9ef24d0792...  2015-03-26  \n",
       "431  {https://www.nfu.edu.cn/ztb/10482a669fc54447aa...  2015-03-26  \n",
       "432  {https://www.nfu.edu.cn/ztb/4e5e67a17b7d47cf8c...  2015-03-20  \n",
       "433  {https://www.nfu.edu.cn/ztb/35a1b4dab36a4ae5aa...  2013-12-23  \n",
       "\n",
       "[434 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "list_df_4 = []\n",
    "url_group_4 = []\n",
    "for i in url_group_four:\n",
    "    e = session.get(i)\n",
    "    url_group_4.append(e)\n",
    "for j in url_group_4:\n",
    "    df_four = pd.DataFrame( {\n",
    "         \"标题\": j.html.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链接\": [i.absolute_links for i in j.html.xpath('//div[@class=\"news_title\"]/a')],\n",
    "         \"日期\": j.html.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )    \n",
    "    list_df_4.append(df_four)\n",
    "df_all_4 = pd.concat(list_df_4).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all_4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "200"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "session = HTMLSession()\n",
    "r_five = session.get( 'https://www.nfu.edu.cn/gjdt/index.htm')\n",
    "r_five.status_code"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "26\n"
     ]
    }
   ],
   "source": [
    "for i in range(1, 100):\n",
    "    effective_url = session.get('https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm')\n",
    "    if effective_url.status_code != 200:\n",
    "        print(i)\n",
    "        break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['https://www.nfu.edu.cn/gjdt/index.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index1.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index2.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index3.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index4.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index5.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index6.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index7.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index8.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index9.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index10.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index11.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index12.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index13.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index14.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index15.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index16.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index17.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index18.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index19.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index20.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index21.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index22.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index23.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index24.htm',\n",
       " 'https://www.nfu.edu.cn/gjdt/index25.htm']"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "url_group_five = ['https://www.nfu.edu.cn/gjdt/index'+str(i)+'.htm' for i in range(1,26)]\n",
    "url_group_five.insert(0,'https://www.nfu.edu.cn/gjdt/index.htm')\n",
    "url_group_five"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>index</th>\n",
       "      <th>标题</th>\n",
       "      <th>链接</th>\n",
       "      <th>日期</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>教育部党组《求是》撰文：精心谋划 切实抓好教育系统党史学习教育</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/309be8b078444044b...</td>\n",
       "      <td>2021-04-08</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>教育部长陈宝生：把巩固拓展作为开局之年工作主题，做到6个到位</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/159b20971f8b4051b...</td>\n",
       "      <td>2021-03-20</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>如何建设高质量教育体系？“十四五”规划和2035年远景目标纲要明确了</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/27ba495edc1b49f88...</td>\n",
       "      <td>2021-03-15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>教育部长陈宝生《旗帜》撰文：建设高质量教育体系，加快建成教育强国</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/20dc120c250642cca...</td>\n",
       "      <td>2021-01-05</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>重磅！《推进粤港澳大湾区高等教育合作发展规划》正式印发</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/b43531427fb44695b...</td>\n",
       "      <td>2020-12-22</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>508</th>\n",
       "      <td>8</td>\n",
       "      <td>广东省教育厅：今年毕业生就业形势比去年好</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/3829e4c5df9e460ab...</td>\n",
       "      <td>2014-03-28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>509</th>\n",
       "      <td>9</td>\n",
       "      <td>要求职业“高大上” 高校毕业生择业扎堆致就业难</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/776ebc41fae84b36a...</td>\n",
       "      <td>2014-03-27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>510</th>\n",
       "      <td>10</td>\n",
       "      <td>教育部：预计今年贫困地区农村学生上重点高校的人数将比去年增加10%以上</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/41d339ccb3a0464c9...</td>\n",
       "      <td>2014-03-25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>511</th>\n",
       "      <td>11</td>\n",
       "      <td>学位论文如何才能挤出“水分”</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/1e8fa309bcf847b6a...</td>\n",
       "      <td>2014-03-24</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>512</th>\n",
       "      <td>12</td>\n",
       "      <td>高校低年级学生频繁试水招聘会 专家：鼓励提前预热</td>\n",
       "      <td>{https://www.nfu.edu.cn/gjdt/3f34245a7cb449c99...</td>\n",
       "      <td>2013-03-31</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>513 rows × 4 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "     index                                   标题  \\\n",
       "0        0      教育部党组《求是》撰文：精心谋划 切实抓好教育系统党史学习教育   \n",
       "1        1       教育部长陈宝生：把巩固拓展作为开局之年工作主题，做到6个到位   \n",
       "2        2   如何建设高质量教育体系？“十四五”规划和2035年远景目标纲要明确了   \n",
       "3        3     教育部长陈宝生《旗帜》撰文：建设高质量教育体系，加快建成教育强国   \n",
       "4        4          重磅！《推进粤港澳大湾区高等教育合作发展规划》正式印发   \n",
       "..     ...                                  ...   \n",
       "508      8                 广东省教育厅：今年毕业生就业形势比去年好   \n",
       "509      9              要求职业“高大上” 高校毕业生择业扎堆致就业难   \n",
       "510     10  教育部：预计今年贫困地区农村学生上重点高校的人数将比去年增加10%以上   \n",
       "511     11                       学位论文如何才能挤出“水分”   \n",
       "512     12             高校低年级学生频繁试水招聘会 专家：鼓励提前预热   \n",
       "\n",
       "                                                    链接          日期  \n",
       "0    {https://www.nfu.edu.cn/gjdt/309be8b078444044b...  2021-04-08  \n",
       "1    {https://www.nfu.edu.cn/gjdt/159b20971f8b4051b...  2021-03-20  \n",
       "2    {https://www.nfu.edu.cn/gjdt/27ba495edc1b49f88...  2021-03-15  \n",
       "3    {https://www.nfu.edu.cn/gjdt/20dc120c250642cca...  2021-01-05  \n",
       "4    {https://www.nfu.edu.cn/gjdt/b43531427fb44695b...  2020-12-22  \n",
       "..                                                 ...         ...  \n",
       "508  {https://www.nfu.edu.cn/gjdt/3829e4c5df9e460ab...  2014-03-28  \n",
       "509  {https://www.nfu.edu.cn/gjdt/776ebc41fae84b36a...  2014-03-27  \n",
       "510  {https://www.nfu.edu.cn/gjdt/41d339ccb3a0464c9...  2014-03-25  \n",
       "511  {https://www.nfu.edu.cn/gjdt/1e8fa309bcf847b6a...  2014-03-24  \n",
       "512  {https://www.nfu.edu.cn/gjdt/3f34245a7cb449c99...  2013-03-31  \n",
       "\n",
       "[513 rows x 4 columns]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "list_df_5 = []\n",
    "url_group_5 = []\n",
    "for i in url_group_five:\n",
    "    e = session.get(i)\n",
    "    url_group_5.append(e)\n",
    "for j in url_group_5:\n",
    "    df_five = pd.DataFrame( {\n",
    "         \"标题\": j.html.xpath('//div[@class=\"news_title\"]/a/@title'),\n",
    "         \"链接\": [i.absolute_links for i in j.html.xpath('//div[@class=\"news_title\"]/a')],\n",
    "         \"日期\": j.html.xpath('//font[@class=\"right-more\"]/text()'),\n",
    "     } )    \n",
    "    list_df_5.append(df_five)    \n",
    "df_all_5 = pd.concat(list_df_5).reset_index().sort_values(by='日期',ascending=False)\n",
    "display(df_all_5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_all_1.to_excel(writer, sheet_name=\"学校要闻\")\n",
    "df_all_2.to_excel(writer, sheet_name=\"校园动态\")\n",
    "df_all_3.to_excel(writer, sheet_name=\"通知公告\")\n",
    "df_all_4.to_excel(writer, sheet_name=\"招投标\")\n",
    "df_all_5.to_excel(writer, sheet_name=\"高教动态\")\n",
    "writer.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
